Data Organization

pepRaw = pepRaw %>% mutate(run = c('ctrl','a','b','c','d','e')[match(pepRaw$file, files)]) %>% 
  group_by(file, `Annotated Sequence`) %>% mutate(pepID = letters[1:n()]) %>% ungroup() %>%
  select(file, run, pepID, everything())

pepTidy = pepRaw %>% gather(key = label, value = intensity, '126':'131') %>% 
  select(run, label, intensity, everything(), -file) %>% 
  mutate(condition = ifelse(run == 'ctrl', 'ctrl',c('ctrl',0,20,40,60,80)[match(label,c(126:131))]),
         intensity = as.numeric(intensity)) %>% unite(peptide, c(`Annotated Sequence`,'pepID')) %>%
  rename(Uniprot = `Master Protein Accessions`) %>% unite(cell, c('run','label','condition'), remove = F) %>%
  select(cell, run, label, condition, peptide, Uniprot, intensity)

Variance and Co-variance Between Cells Before Normalization

ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()

The peptide intensities appear to follow a log-normal distribution.

pairs = pepTidy %>% filter(condition == 'ctrl') %>% select(cell, peptide, intensity) %>% spread(cell, intensity)

ggpairs(log2(pairs[,2:ncol(pairs)]), lower = list(continuous = wrap("points", alpha = 0.1, size = 0.1))) + theme_bw()

meanSD = pepTidy %>% group_by(peptide) %>% summarise(mean = mean(log2(intensity), na.rm = T), sd = sd(log2(intensity), na.rm = T) / mean)
ggplot(meanSD, aes(x = mean, y = sd)) + geom_point() + geom_smooth(method = lm, formula = y ~ poly(x, 2))

model = lm(sd ~ poly(mean, degree = 2), data = meanSD)

Variance and Co-variance Between Cells After Cell Normalization

pepTidy = pepTidy %>% group_by(cell) %>% mutate(intensity = intensity / median(intensity, na.rm = T))

ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()

pairs = pepTidy %>% filter(condition == 'ctrl') %>% select(cell, peptide, intensity) %>% spread(cell, intensity)

ggpairs(log2(pairs[,2:ncol(pairs)]), lower = list(continuous = wrap("points", alpha = 0.1, size = 0.1))) + theme_bw()

Variance and Co-variance Between Cells After Peptide Batch Normalization

pepTidy = pepTidy %>% group_by(run, peptide) %>% mutate(intensity = intensity / median(intensity, na.rm = T)) %>% ungroup()

ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()

pairs = pepTidy %>% filter(condition == 'ctrl') %>% select(cell, peptide, intensity) %>% spread(cell, intensity)

ggpairs(log2(pairs[,2:ncol(pairs)]), lower = list(continuous = wrap("points", alpha = 0.1, size = 0.1))) + theme_bw()

There are

Variance and Co-variance Between Cells After Removing Peptides Outside of 2*sd

pepTidy %>% filter(abs(log2(intensity)) < 2*sd(log2(pepTidy$intensity), na.rm = T))
## # A tibble: 195,387 x 7
##    cell       run   label condition peptide               Uniprot intensity
##    <chr>      <chr> <chr> <chr>     <chr>                 <chr>       <dbl>
##  1 ctrl_126_… ctrl  126   ctrl      yGkDATNVGDEGGFAPNILE… Q7SZ25      0.682
##  2 ctrl_126_… ctrl  126   ctrl      aWVWNTYADYADELPkPELL… O42193      1.41 
##  3 ctrl_126_… ctrl  126   ctrl      lSGVSLSSDAFFPFkDNLER… Q6ING0      1.06 
##  4 ctrl_126_… ctrl  126   ctrl      lSGVSLSSDAFFPFkDNLER… Q6ING0      0.926
##  5 ctrl_126_… ctrl  126   ctrl      iVATTLNTPELFDEWRDNVk… Q7ZTK9      0.956
##  6 ctrl_126_… ctrl  126   ctrl      iLSEENSDFSVNLFNQLSTE… Q00387      0.987
##  7 ctrl_126_… ctrl  126   ctrl      eTVVEVPQVTWEDIGGLEDV… P23787      0.869
##  8 ctrl_126_… ctrl  126   ctrl      vLAIAVETDYSFPLADkVk_a Q8AVI3      1.09 
##  9 ctrl_126_… ctrl  126   ctrl      aLAYQNPQVGVLENLHAAAY… Q6DE33      0.853
## 10 ctrl_126_… ctrl  126   ctrl      nYPVVSIEDPFDQDHWEAWT… Q7SZ25      1.27 
## # … with 195,377 more rows
ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()

The standard deviation of all peptides across eggs was calculated, and every peptide that had a value outside of 0.683984

if(

)